"""
Functions shared between data processing scripts
"""

import sys
import operator

def read_mapping(mapping_file):
    """ Read (gene ID) -> (gene symbol) mapping file into a dict """
    mapping_hash = {}
    try:
        fp = open(mapping_file, "r")
    except IOError:
        sys.exit("Error opening file {}".format(mapping_file))

    for line in fp.readlines():
        info = line.split()
        mapping_hash[info[0]] = info[1]

    fp.close()
    return mapping_hash


def read_seed(seed_file, seed_number, seed_hash):
    """ Read seed file into a list containing sublists representing each seed """
    try:
        fp = open(seed_file, "r")
    except IOError:
        sys.exit("Error opening file {}".format(seed_file))

    seed_hash[seed_number] = []
    for line in fp.readlines():
        info = line.split()
        seed_hash[seed_number].append(info[1])

    fp.close()
    return seed_hash


def read_rwr(prediction_file, seed_hash, seed_number):
    """ Read RWR results file (generated by multi_matrix.py) into a dict.

    NOTE that this function also removes results that are in the current
    seed (since they aren't informative, for our purposes).

    seed_hash: maps (gene ID) -> (ranking number)
    """
    ranking_hash = {}

    try:
        fp = open(prediction_file, "r")
    except IOError:
        sys.exit("Error opening file {}".format(prediction_file))

    # start rankings from 1, rather than 0
    index = 1
    for line in fp.readlines():
        # for now, filter out everything in all seeds
        # this should probably be generalized somehow
        if line.strip() not in seed_hash[seed_number]:
            ranking_hash[line.strip()] = index
            index += 1

    fp.close()
    return ranking_hash
